package com.trytech.mongoocrawler.server.parser;

import com.trytech.mongoocrawler.server.common.queue.UrlFetcherEventProducer;
import com.trytech.mongoocrawler.server.transport.http.WebResult;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.nio.charset.Charset;

/**
 * Created by coliza on 2017/4/10.
 */
public abstract class HtmlParser<T> {
    public abstract T parse(WebResult webResult, UrlFetcherEventProducer urlProducer);
    protected Element getBody(String html){
        Document doc = Jsoup.parse(html);
        doc.charset(Charset.forName("UTF-8"));
        Element body = doc.body();
        return body;
    }
}
